From 5ee3c045eeb0c6cc418438e89ef18f4adbd16fb8 Mon Sep 17 00:00:00 2001 From: "kaf24@freefall.cl.cam.ac.uk" Date: Thu, 28 Oct 2004 13:03:45 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.295 (4180ee31v7apKAXQ_iCd672ndA6I0Q) New TLB-flush logic. By basing NEED_FLUSH() on the current time, as well as the CPU and page timestamps, I was able to get rid of the tedious epoch logic. We now only need special-case logic when the 32-bit clock wraps. In debug build I deliberately restrict the clock to 10 bits, so that the wrap logic gets exercised. --- xen/arch/x86/flushtlb.c | 47 +++++++++++++++++++++------------- xen/arch/x86/smp.c | 9 ++----- xen/include/asm-x86/flushtlb.h | 44 +++++++++++++++---------------- 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index 023fcd354c..6e50febc4f 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -12,7 +12,14 @@ #include #include -u32 tlbflush_clock; +/* Debug builds: Wrap frequently to stress-test the wrap logic. */ +#ifdef NDEBUG +#define WRAP_MASK (0xFFFFFFFFU) +#else +#define WRAP_MASK (0x000003FFU) +#endif + +u32 tlbflush_clock = 1U; u32 tlbflush_time[NR_CPUS]; void write_cr3(unsigned long cr3) @@ -20,38 +27,42 @@ void write_cr3(unsigned long cr3) u32 t, t1, t2; unsigned long flags; + /* This non-reentrant function is sometimes called in interrupt context. */ local_irq_save(flags); /* - * Tick the clock, which is incremented by two each time. The L.S.B. is - * used to decide who will control the epoch change, when one is required. + * STEP 1. Increment the virtual clock *before* flushing the TLB. + * If we do it after, we race other CPUs invalidating PTEs. + * (e.g., a page invalidated after the flush might get the old + * timestamp, but this CPU can speculatively fetch the mapping + * into its TLB after the flush but before inc'ing the clock). */ + t = tlbflush_clock; do { - t1 = t; /* t1: Time before this clock tick. */ - t2 = t + 2; /* t2: Time after this clock tick. */ - if ( unlikely(t2 & 1) ) - { - /* Epoch change: someone else is leader. */ - t2 = t; /* no tick */ + t1 = t2 = t; + /* Clock wrapped: someone else is leading a global TLB shootodiown. */ + if ( unlikely(t1 == 0) ) goto skip_clocktick; - } - else if ( unlikely((t2 & TLBCLOCK_EPOCH_MASK) == 0) ) - { - /* Epoch change: we may become leader. */ - t2--; /* half tick */ - } + t2 = (t + 1) & WRAP_MASK; } while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) ); - /* Epoch change: we are the leader. */ - if ( unlikely(t2 & 1) ) + /* Clock wrapped: we will lead a global TLB shootdown. */ + if ( unlikely(t2 == 0) ) raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); + /* + * STEP 2. Update %CR3, thereby flushing the TLB. + */ + skip_clocktick: __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" ); - /* Update this CPU's timestamp to new time. */ + /* + * STEP 3. Update this CPU's timestamp. + */ + tlbflush_time[smp_processor_id()] = t2; local_irq_restore(flags); diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index a7172908bb..5ac93e22aa 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -261,15 +261,9 @@ void flush_tlb_mask(unsigned long mask) } } -/* - * NB. Must be called with no locks held and interrupts enabled. - * (e.g., softirq context). - */ +/* Call with no locks held and interrupts enabled (e.g., softirq context). */ void new_tlbflush_clock_period(void) { - /* Only the leader gets here. Noone else should tick the clock. */ - ASSERT(((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) == 0); - /* Flush everyone else. We definitely flushed just before entry. */ if ( smp_num_cpus > 1 ) { @@ -285,6 +279,7 @@ void new_tlbflush_clock_period(void) } /* No need for atomicity: we are the only possible updater. */ + ASSERT(tlbflush_clock == 0); tlbflush_clock++; } diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h index 8b068e3191..cebb78ebb5 100644 --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -13,40 +13,36 @@ #include #include -/* - * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed. - * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock. - */ -#define TLBCLOCK_EPOCH_MASK ((1U<<20)-1) +/* The current time as shown by the virtual TLB clock. */ +extern u32 tlbflush_clock; + +/* Time at which each CPU's TLB was last flushed. */ +extern u32 tlbflush_time[NR_CPUS]; + +#define tlbflush_current_time() tlbflush_clock /* - * 'cpu_stamp' is the current timestamp for the CPU we are testing. - * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last + * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. + * @lastuse_stamp is a timestamp taken when the PFN we are testing was last * used for a purpose that may have caused the CPU's TLB to become tainted. */ static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) { + u32 curr_time = tlbflush_current_time(); /* - * Worst case in which a flush really is required: - * 1. CPU has not flushed since end of last epoch. - * 2. Clock has run to end of current epoch. - * THEREFORE: Maximum valid difference is (EPOCH_MASK + 1). - * N.B. The clock cannot run further until the CPU has flushed once more - * and updated to current time, so this is as 'far out' as it can get. + * Two cases: + * 1. During a wrap, the clock ticks over to 0 while CPUs catch up. For + * safety during this period, we force a flush if @curr_time == 0. + * 2. Otherwise, we look to see if @cpu_stamp <= @lastuse_stamp. + * To detect false positives because @cpu_stamp has wrapped, we + * also check @curr_time. If less than @lastuse_stamp we definitely + * wrapped, so there's no need for a flush (one is forced every wrap). */ - return ((lastuse_stamp - cpu_stamp) <= (TLBCLOCK_EPOCH_MASK + 1)); + return ((curr_time == 0) || + ((cpu_stamp <= lastuse_stamp) && + (lastuse_stamp <= curr_time))); } -/* - * The least significant bit of the clock indicates whether an epoch-change - * is in progress. All other bits form the counter that is incremented on - * each clock tick. - */ -extern u32 tlbflush_clock; -extern u32 tlbflush_time[NR_CPUS]; - -#define tlbflush_current_time() tlbflush_clock - extern void new_tlbflush_clock_period(void); /* Read pagetable base. */ -- 2.30.2